**************************************************************
***** Name:        do_merge_team1.do      		     	 *****
***** Description: This do file creates data at the	 	 *****
***** team level (part 1)								 *****
**************************************************************

clear all
global path "C:\Users\patry\OneDrive\Desktop\Mario replication files"
cd "$path"
set more off


* 1.0 Use ingredient data 

use "Data/raw data/team_lab.dta", clear

* 2.0 Create round difficulty scoore

* 2.1 Merge data on recipes

merge 1:1 team_id pair round using "Data/raw data/team_recipes.dta"

/* Notes:

 We are merging from team-round (performance) to team-phase (recipes). 
 
 1) _merge == 1 We have team performance but not recipe data
 
 There are 90 missing:
 
 * 90 are missing because they are for pair 3, which we did not record screens.

 */

 drop _merge
 
* 2.2 Create difficulty measure

* count of total recipes
egen count = rowtotal(recipe_*)
 
* team_id variable
encode team_id, gen(team_n)

* create temporary version of controls
 foreach y in count round team_n {
	gen temp_`y' = `y'
 }
 
 
* Predict difficulty using team fixed effects
reg totalingred any_CMF any_CMM any_CFF any_CCM any_CCF any_MMF any_MFF temp_count i.temp_round i.temp_team_n if phase2==1, r cluster(team_id)
* Store for appendix
estimates store rounddiffprediction

******************************************************
*	   TABLE B16: WEIGHTS FROM PREDICTION ALGORITHM  *
******************************************************

esttab rounddiffprediction   using Output/Tables/Appendix/TableB16.tex,  keep(any_CMF any_CMM any_CFF any_CCM any_CCF any_MMF any_MFF temp_count) se starlevels( * 0.10 ** 0.05 *** 0.010) stats(r2 N group ctrlmean rounds , label("R2" "Observations" "Group" "Control mean" "Rounds" )  fmt(%9.3f 0 %9.3f 0 %9.3f %9.3f ))  replace


  
* Control for count, round, and team fixed effects in prediction but don't use it in estimation 

foreach y in count round team_n {
	replace temp_`y' = 0
}
predict round_diff

* Fill in mean for two missing teams (2019102411 & 2019112501) + round 7 of team 2019112203

qui sum round_diff,
replace round_diff = r(mean) if inlist(team_id,"102411","112501") & phase3==1
replace round_diff = r(mean) if team_id=="112203" & round==7

* Normalize difficulty variable

qui sum round_diff
qui replace round_diff = -1*(round_diff - r(mean))/r(sd)
la var round_diff "Round difficulty, z-score"


* Clean up
drop temp_* count team_n

gen DIFFICULTY = . 
label var DIFFICULTY "************************************"
order DIFFICULTY, before(recipe_CMF)





* Control for phase 2 performance

levelsof team_id , local(teams)

gen phase2score = .

foreach t in `teams' {
	
	* 1-2 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,1,2) | inlist(player_2,1,2))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==1 & player_2==2
	
	* 3-4 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,3,4) | inlist(player_2,3,4))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==3 & player_2==4
	
	* 1-3 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,1,3) | inlist(player_2,1,3))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==1 & player_2==3
	
	* 1-4 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,1,4) | inlist(player_2,1,4))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==1 & player_2==4
	
	* 2-4 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,2,4) | inlist(player_2,2,4))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==2 & player_2==4
	
	* 2-3 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,2,3) | inlist(player_2,2,3))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==2 & player_2==3
	
	* 2-0 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,2,0) | inlist(player_2,2,0))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==2 & player_2==0
	
	* 3-0 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,3,0) | inlist(player_2,3,0))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==3 & player_2==0
	
	* 4-0 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,4,0) | inlist(player_2,4,0))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==4 & player_2==0
	
	* 6-2 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,6,2) | inlist(player_2,6,2))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==6 & player_2==2
	
	* 0-2 control:
	qui sum totalingred if phase2==1 & team_id=="`t'" & (inlist(player_1,0,2) | inlist(player_2,0,2))
	qui replace phase2score = r(mean) if  phase3==1 & team_id=="`t'" & player_1==0 & player_2==2

}


* Round difficulty
qui sum round_diff, d
gen round_diff_low =    round_diff <= r(p50)
gen round_diff_high =   round_diff > r(p50)
	 


* 2.3 Save individual-level data for merge

preserve

	keep team_id pair round round_diff
	save "Data/temp data/indiv_difficulty.dta", replace

restore

* 2.4 Save ingredients for merge to individual who are missing player-specific performance data

preserve

	keep if inlist(team_id, "102411", "112203", "112501")
	keep team_id pair player_1 player_2 round totalingred
	save "Data/temp data/teamscoremerge.dta", replace

restore


* 3.0 Save

sort team_id round pair

save "Data/temp data/team_temp.dta", replace

exit




